
clear all
set more off

//  POPOLAZIONE: MOVIMENTO ANNUALE POPOLAZIONE RESIDENTE         

forvalues t=1995/2012{
insheet using "$rawpop\pop_`t'.txt" , clear
save "$popolazione\pop_`t'.dta" , replace
}

forvalues t=2001/2005{
use "$popolazione\pop_`t'.dta" , clear
rename provincia prov
rename popolazione_finale_maschile pop_fin_mas
rename popolazione_finale_femminile pop_fin_fem
save "$popolazione\pop_`t'.dta" , replace
}
 
 forvalues t=2001/2012{
use "$popolazione\pop_`t'.dta" , clear
rename regione region
rename anno year
rename prov cod_prov 
rename comune cod_mun 
gen mun=string(cod_prov,"%03.0f")+string(cod_mun,"%03.0f")
rename pop_fin_mas  mp 
lab var mp "male population"
rename pop_fin_fem  fp 
lab var fp "female population"
egen tp = rowtotal (mp fp)
lab var tp "total population"
keep region year cod_prov cod_mun mun mp fp tp
save "$data\P_`t'.dta" , replace
 }
 
  forvalues t=1995/2000{
use "$popolazione\pop_`t'.dta" , clear
rename regione region
gen year= `t'
rename prov cod_prov 
rename comune cod_mun 
gen mun=string(cod_prov,"%03.0f")+string(cod_mun,"%03.0f")
rename popolaz31dict tp
lab var tp "total population"
rename popolaz31dicm mp
lab var mp "male population"
gen fp=tp-mp
lab var fp "female population"
keep region year cod_prov cod_mun mun mp fp tp
save "$data\P_`t'.dta" , replace
}

*  Aggregate population files

 use "$data\P_1995.dta" , clear

label define codici_province 001 "Torino" 002 "Vercelli" 003 "Novara" 004 "Cuneo" 005 "Asti" 006 "Alessandria" 096 "Biella" 103 "Verbano-Cusio-Ossola" ///
007 "Valle D'Aosta/Valle d'Aoste" 012 "Varese" 013 "Como" 014 "Sondrio" 015 "Milano" 016 "Bergamo" 017 "Brescia" 018 "Pavia" 019 "Cremona" 020 "Mantova" 097 "Lecco" 098 "Lodi" 108 ///
"Monza e della Brianza" 021 "Bolzano/Bozen" 022 "Trento" 023 "Verona" 024 "Vicenza" 025 "Belluno" 026 "Treviso" 027 "Venezia" 028 "Padova" 029 "Rovigo" 030 "Udine" 031 "Gorizia" ///
032 "Trieste" 093 "Pordenone" 008 "Imperia" 009 "Savona" 010 "Genova" 011 "La Spezia" 033 "Piacenza" 034 "Parma" 035 "Reggio nell'Emilia" 036 "Modena" 037 "Bologna" 038 "Ferrara" ///
039 "Ravenna" 040 "Forli-Cesena" 099 "Rimini" 045 "Massa-Carrara" 046 "Lucca" 047 "Pistoia" 048 "Firenze" 049 "Livorno" 050 "Pisa" 051 "Arezzo" 052 "Siena" 053 "Grosseto" 100 "Prato" ///
054	"Perugia" 055 "Terni" 041 "Pesaro e Urbino" 042	"Ancona" 043	"Macerata" 044	"Ascoli Piceno" 109	"Fermo" 056	"Viterbo" 057	"Rieti" 058	"Roma" 059	"Latina" 060 "Frosinone" ///
066	"L'Aquila" 067	"Teramo" 068	"Pescara" 069	"Chieti" 070	"Campobasso" 094	"Isernia" 061	"Caserta" 062	"Benevento" 063	"Napoli" 064	"Avellino" 065	"Salerno" ///
071 "Foggia" 072	"Bari" 073	"Taranto" 074	"Brindisi" 075	"Lecce" 110	"Barletta-Andria-Trani" 076	"Potenza" 077	"Matera" 078	"Cosenza" 079	"Catanzaro" ///
080	"Reggio di Calabria" 101	"Crotone" 102	"Vibo Valentia" 081	"Trapani" 082	"Palermo" 083	"Messina" 084	"Agrigento" 085	"Caltanissetta" 086	"Enna" 087	"Catania" ///
088	"Ragusa" 089	"Siracusa" 090	"Sassari" 091	"Nuoro" 092	"Cagliari" 095	"Oristano" 104	"Olbia-Tempio" 105	"Ogliastra" 106	"Medio Campidano" 107	"Carbonia-Iglesias"
label define codici_regioni  1 "Piemonte" 2 "Valle D'Aosta" 3 "Lombardia" 4 "Trentino Alto-Adige" 5 "Veneto" 6 "Friuli Venezia Giulia" 7 "Liguria" 8 "Emilia Romagna" ///
9 "Toscana" 10 "Umbria" 11 "Marche" 12 "Lazio" 13 "Abruzzo" 14 "Molise" 15 "Campania" 16 "Puglia" 17 "Basilicata" 18 "Calabria" 19 "Sicilia" 20 "Sardegna" 

 forvalues t=1996/2012{
  append using "$data\P_`t'.dta" 
  } 
  lab values cod_prov codici_province
  lab values region codici_regioni
 
  preserve
  collapse (sum)  mp fp tp , by (year cod_prov region)
  gen flag=1
  bys year region : egen np=sum (flag)
 save "$data\P_datasetprov.dta" , replace
  restore
  
  order year region cod_prov cod_mun mun tp mp fp
  duplicates report
  duplicates tag year region cod_prov cod_mun mun, gen (rep)
  tab rep
  duplicates drop year region cod_prov cod_mun mun, force
  drop rep 
 save "$data\P_dataset.dta" , replace
  
    
  
//  MIGRANTI: MOVIMENTO ANNUALE POPOLAZIONE STRANIERA - SESSO E CITTADINANZA   1998-2010 
              
forvalues t=2002/2012{
insheet using "$rawmig\migsc_`t'.txt" , clear
save "$migranti\migsc_`t'.dta" , replace
}
* 2001 NON AVAILABLE
forvalues t=1995/2000{
insheet using  "$rawmig\migsc_`t'.txt" , clear
save "$migranti\migsc_`t'.dta" , replace
}

* 2005-2010
forval t=2005/2012{

use "$migranti\migsc_`t'.dta" , clear
label define continente 1 "europa" 2 "africa" 3 "asia" 4 "america" 5 "oceania" 6 "apolidi"
label define subcontinente 11 "EU" 12 "EU CE" 13 "EU other" 21 "AfricaN" 22 "AfricaW" ///
             23 "AfricaE" 24 "AfricaCS" 31 "AsiaW" 32 "AsiaS" 33 "AsiaE" 41 "AmericaN" 42 "AmericaCS" 50 "Oceania" 60 "Apolidi"
label define socioeco 1 "paese avanzato" 2 "paese con pressione migratoria"
rename regione region
rename provincia cod_prov
rename comune cod_mun
rename maschi mmn
lab var mmn "migrant male nationality"
rename femmine fmn
lab var fmn "migrant female nationality"
rename totale tmn
lab var tmn "total migrant nationality"
rename paese nationality
rename continente continent
lab values continent continente
rename sub_area farea
lab var farea "foreign area"
lab values farea subcontinente
rename area_socio_econ seco
lab var seco "socio-economic environment"
lab values seco socioeco
rename anno year 
drop qualit_ v13
collapse (sum) mmn fmn tmn , by(nationality region cod_prov continent farea year)
save "$data\MSC_`t'.dta" , replace

}

*2004
use "$migranti\migsc_2004.dta" , clear
label define continente 1 "europa" 2 "africa" 3 "asia" 4 "america" 5 "oceania" 6 "apolidi"
label define subcontinente 11 "EU" 12 "EU CE" 13 "EU other" 21 "AfricaN" 22 "AfricaW" ///
             23 "AfricaE" 24 "AfricaCS" 31 "AsiaW" 32 "AsiaS" 33 "AsiaE" 41 "AmericaN" 42 "AmericaCS" 50 "Oceania" 60 "Apolidi"
label define socioeco 1 "paese avanzato" 2 "paese con pressione migratoria"
rename regione region
rename provincia cod_prov
rename comune cod_mun
rename maschi mmn
lab var mmn "migrant male nationality"
rename femmine fmn
lab var fmn "migrant female nationality"
rename totale tmn
lab var tmn "total migrant nationality"
rename paese_di_cittadinanza nationality
rename continente_appartenenza continent
lab values continent continente
rename sub_area_cont farea
lab var farea "foreign area"
lab values farea subcontinente
rename area_socio_economica_appart seco
lab var seco "socio-economic environment"
lab values seco socioeco
rename anno year 
collapse (sum) mmn fmn tmn , by(nationality region cod_prov continent farea year)
save "$data\MSC_2004.dta" , replace

* farea classification
use "$data\MSC_2010.dta" , clear
keep nationality farea continent
duplicates drop nationality farea continent, force
save "$data\farea.dta" , replace

*2003
use "$migranti\migsc_2003.dta" , clear
label define continente 1 "europa" 2 "africa" 3 "asia" 4 "america" 5 "oceania" 6 "apolidi"
label define subcontinente 11 "EU" 12 "EU CE" 13 "EU other" 21 "AfricaN" 22 "AfricaW" ///
             23 "AfricaE" 24 "AfricaCS" 31 "AsiaW" 32 "AsiaS" 33 "AsiaE" 41 "AmericaN" 42 "AmericaCS" 50 "Oceania" 60 "Apolidi"
label define socioeco 1 "paese avanzato" 2 "paese con pressione migratoria"
rename regione region
rename provincia cod_prov
rename comune cod_mun
rename maschi mmn
lab var mmn "migrant male nationality"
rename femmine fmn
lab var fmn "migrant female nationality"
rename totale tmn
lab var tmn "total migrant nationality"
rename paese_di_cittadinanza nationality
rename anno year 
merge m:1 nationality using "$data\farea.dta"
collapse (sum) mmn fmn tmn , by(nationality region cod_prov continent farea year)
save "$data\MSC_2003.dta" , replace

*2002
use "$migranti\migsc_2002.dta" , clear
label define continente 1 "europa" 2 "africa" 3 "asia" 4 "america" 5 "oceania" 6 "apolidi"
label define subcontinente 11 "EU" 12 "EU CE" 13 "EU other" 21 "AfricaN" 22 "AfricaW" ///
             23 "AfricaE" 24 "AfricaCS" 31 "AsiaW" 32 "AsiaS" 33 "AsiaE" 41 "AmericaN" 42 "AmericaCS" 50 "Oceania" 60 "Apolidi"
label define socioeco 1 "paese avanzato" 2 "paese con pressione migratoria"
rename regione region
rename provincia cod_prov
rename campo_lib cod_mun
drop cod_mun 
rename maschi mmn
lab var mmn "migrant male nationality"
rename femmine fmn
lab var fmn "migrant female nationality"
rename totale tmn
lab var tmn "total migrant nationality"
rename paese nationality
rename continente continent
lab values continent continente
rename sub_area farea
lab var farea "foreign area"
lab values farea subcontinente
rename area_socio_econ seco
lab var seco "socio-economic environment"
lab values seco socioeco
rename anno year 
collapse (sum) mmn fmn tmn , by(nationality region cod_prov continent farea year)
save "$data\MSC_2002.dta" , replace

* 1995-2000
forvalues t=1995/2000{
use "$migranti\migsc_`t'.dta" , clear
label define continente 1 "europa" 2 "africa" 3 "asia" 4 "america" 5 "oceania" 6 "apolidi"
label define subcontinente 11 "EU" 12 "EU CE" 13 "EU other" 21 "AfricaN" 22 "AfricaW" ///
             23 "AfricaE" 24 "AfricaCS" 31 "AsiaW" 32 "AsiaS" 33 "AsiaE" 41 "AmericaN" 42 "AmericaCS" 50 "Oceania" 60 "Apolidi"
label define socioeco 1 "paese avanzato" 2 "paese con pressione migratoria"
rename regione region
rename provincia cod_prov
rename comune cod_mun
rename maschi mmn
lab var mmn "migrant male nationality"
rename femmine fmn
lab var fmn "migrant female nationality"
gen tmn= mmn+fmn
lab var tmn "total migrant nationality"
rename paese_di_cittadinanza nationality
rename continente_di_appartenenza continent
lab values continent continente
rename sub_area_di_continente farea
lab var farea "foreign area"
lab values farea subcontinente
rename area_socio_economica_di_provenie seco
lab var seco "socio-economic environment"
lab values seco socioeco
gen year =`t'
collapse (sum) mmn fmn tmn , by(nationality region cod_prov continent farea year)
save "$data\MSC_`t'.dta" , replace
}


*Generate 2001 as average of 2000-2002

local n 2002
forvalues t=2000(2)`n'{
use "$data\MSC_`t'.dta" , clear
foreach var in mmn fmn tmn year {
rename `var' `var'`t' 
destring `var'`t' , replace
}
* check duplicates on merge variables
duplicates report
duplicates tag region cod_prov  nationality , gen(rep)
tab rep
duplicates drop region cod_prov  nationality , force
save "$data\MSC_merge`t'.dta" , replace
}
use "$data\MSC_merge2000.dta" , clear
merge 1:1 region cod_prov nationality using "$data\MSC_merge2002.dta"
sort _merge
gen year=2001 
egen mmn= rowmean(mmn2000 mmn2002)
egen fmn= rowmean(fmn2000 fmn2002)
egen tmn= rowmean(tmn2000 tmn2002)
replace mmn=mmn2000 if _merge==1
replace mmn=mmn2002 if _merge==2
replace fmn=fmn2000 if _merge==1
replace fmn=fmn2000 if _merge==2
replace tmn=tmn2000 if _merge==1
replace tmn=tmn2000 if _merge==2
egen mmn_med= rowmedian(mmn2000 mmn2002)
egen fmn_med= rowmedian(fmn2000 fmn2002)
egen tmn_med= rowmedian(tmn2000 tmn2002)
foreach var in mmn fmn tmn mmn_med fmn_med tmn_med{
replace `var'=round(`var')
}
gen diff=tmn -(mmn+fmn)
replace tmn=tmn+1 if diff!=0
keep region cod_prov nationality farea  year mmn fmn tmn mmn_med fmn_med tmn_med 
save "$data\MSC_2001.dta" , replace



*   Aggregate dataset 1995-2012

set more off
use "$data\MSC_1995.dta" , clear
forvalues t=1996/2012{
append using "$data\MSC_`t'.dta" , force
}
label define cittadinanza  201 "Albania" 202	"Andorra" 203 "Austria" 206	"Belgio" 209 "Bulgaria" 210 "Ex-cecoslovacchia" 212	"Danimarca" 214	"Finlandia" 215	"Francia" 216 "Germania" 219 "Regno Unito" ///	
220	"Grecia" 221 "Irlanda" 223 "Islanda" 225 "Liechtenstein" 226 "Lussemburgo" 227	"Malta" 229	"Monaco" 231 "Norvegia" 232	"Paesi Bassi" 233 "Polonia" 234	"Portogallo" ///	
235	"Romania" 236 "San Marino" 239	"Spagna" 240 "Svezia" 241 "Svizzera" 243 "Ucraina" 244	"Ungheria" 245	"Russa,Federazione" 246	"Santa Sede" 247 "Estonia" 248 "Lettonia" ///
249	"Lituania" 250 "Croazia" 251 "Slovenia" 252	"Bosnia-Erzegovina" 253	"Macedonia,ex Repubblica Jugoslava di" 254 "Moldova" 255 "Slovacchia" 256 "Bielorussia" 257	"Ceca,Repubblica" ///	
270	"Montenegro" 271 "Serbia, Repubblica di" 301 "Afghanistan" 302 "Arabia Saudita" 304	"Bahrein" 305 "Bangladesh" 306 "Bhutan" 307	"Myanmar (ex Birmania)" 309	"Brunei" ///
310	"Cambogia" 311 "Sri Lanka (ex Ceylon)" 314	"Cinese, Repubblica Popolare" 315 "Cipro" 319 "Corea, Repubblica Popolare Democratica (Corea del Nord)" ///
320	"Corea, Repubblica (Corea del Sud)" 322	"Emirati Arabi Uniti" 323	"Filippine" 324	"Territori dell'Autonomia Palestinese" 	326	"Giappone" 	327	"Giordania" 330	"India" ///	
331	"Indonesia" 332	"Iran, Repubblica Islamica del" 333	"Iraq" 334	"Israele" 335 "Kuwait" 336 "Laos" 337 "Libano" 338 "Timor Orientale" 339 "Maldive" 340 "Malaysia" 341 "Mongolia" ///	
342	"Nepal" 343	"Oman" 344 "Pakistan" 345 "Qatar" 346	"Singapore" 348	"Siria" 349	"Thailandia" 351 "Turchia" 353 "Vietnam" 354 "Yemen" 356 "Kazakhstan" 357 "Uzbekistan" ///	
358	"Armenia" 359 "Azerbaigian" 360	"Georgia" 361 "Kirghizistan" 362 "Tagikistan" 363 "Taiwan (ex Formosa)" 364	"Turkmenistan" 401 "Algeria" 402 "Angola" 404 "Costa d'Avorio" ///	
406	"Benin (ex Dahomey)" 408 "Botswana" 409	"Burkina Faso (ex Alto Volta)" 410	"Burundi" 411 "Camerun" 413	"Capo Verde" 414 "Centrafricana, Repubblica" 415 "Ciad" 417	"Comore" ///	
418	"Congo (Repubblica del)" 419 "Egitto" 420 "Etiopia" 421	"Gabon" 422	"Gambia" 423 "Ghana" 424 "Gibuti" 425 "Guinea" 426 "GuineaBissau" 427 "Guinea Equatoriale" 428	"Kenya" ///	
429	"Lesotho" 430	"Liberia" 431 "Libia" 432 "Madagascar" 434	"Malawi" 435 "Mali" 436	"Marocco" 437	"Mauritania" 438 "Mauritius" 440 "Mozambico" 441 "Namibia" 442	"Niger" ///	
443	"Nigeria" 446 "Ruanda" 448	"Sao Tome e Principe" 449 "Seychelles" 450	"Senegal" 451 "Sierra Leone" 453 "Somalia" 454	"Sud Africa" 455 "Sudan" 456 "Swaziland" 457 "Tanzania" ///	
458	"Togo" 460	"Tunisia" 461 "Uganda" 463	"Congo, Repubblica democratica del (ex Zaire)" 464 "Zambia" 465	"Zimbabwe (ex Rhodesia)" 466 "Eritrea" 503 "Antigua e Barbuda" ///	
505	"Bahamas" 506 "Barbados" 507 "Belize" 509 "Canada" 513	"Costa Rica" 514 "Cuba" 515	"Dominica" 516	"Dominicana, Repubblica" 517 "El Salvador" 518	"Giamaica" 519 "Grenada" ///	
523	"Guatemala" 524	"Haiti" 525	"Honduras" 527	"Messico" 529 "Nicaragua" 530 "Panama" 532	"Saint Lucia" 533 "Saint Vincent e Grenadine" 534 "Saint Kitts e Nevis" ///
536	"Stati Uniti d'America" 602	"Argentina" 604	"Bolivia" 605	"Brasile" 606 "Cile" 608 "Colombia" 609	"Ecuador" 612 "Guyana" 614	"Paraguay" 615	"Peru" 616 "Suriname" ///	
617	"Trinidad e Tobago" 618	"Uruguay" 619 "Venezuela" 701 "Australia" 703 "Figi" 708 "Kiribati" 712	"Marshall, Isole" 713 "Micronesia, Stati Federati" 715 "Nauru" 719 "Nuova Zelanda" ///	
720	"Palau" 721	"Papua Nuova Guinea" 725 "Salomone, Isole" 727	"Samoa" 730	"Tonga" 731	"Tuvalu" 732 "Vanuatu" 999 "APOLIDE" 888 "altro" 777 "non indicato"
lab values nationality cittadinanza
lab values cod_prov codici_province
order year region cod_prov nationality continent farea
keep  year region cod_prov nationality farea mmn fmn tmn mmn_med fmn_med tmn_med
save "$data\MSC_dataset.dta" , replace



//  MIGRANTI: MOVIMENTO ANNUALE POPOLAZIONE STRANIERA  BILANCIO ANNUALE   

forvalues t=2002/2012{
insheet using "$rawmig\migb_`t'.txt" , clear
save "$migranti\migb_`t'.dta" , replace
}
*2001 NOT AVAILABLE
forvalues t=1995/2000{
insheet using "$rawmig\migb_`t'.txt" , clear
save "$migranti\migb_`t'.dta" , replace
}

* change some variables years 1998-2006
forval t=2005/2006{
use "$migranti\migb_`t'.dta" , clear
   rename maschi_restran m_popfin
   rename femmine_restran f_popfin
   rename m_f_restran m_f_popfin
   rename maschi_min m_minsti
   rename femmine_min f_minsti
   rename m_f_min m_f_minsti
save "$migranti\migb_`t'.dta" , replace
}
use "$migranti\migb_2004.dta" , clear   
   rename popolazione_straniera_resid__mas m_popfin
   rename popolazione_straniera_resid__fem f_popfin
   rename popolazione_straniera_resid__tot m_f_popfin
   rename di_cui__minorenni_maschi m_minsti
   rename di_cui__minorenni_femmine f_minsti
   rename di_cui__minorenni_totale m_f_minsti
   rename denominazione_comune com
save "$migranti\migb_2004.dta" , replace

use "$migranti\migb_2003.dta" , clear   
   rename popolazione_straniera_resid__mas m_popfin
   rename popolazione_straniera_resid__fem f_popfin
   rename popolazione_straniera_resid__tot m_f_popfin
   rename di_cui__minorenni_maschi m_minsti
   rename di_cui__minorenni_femmine f_minsti
   rename di_cui__minorenni_totale m_f_minsti
   rename denominazione_comune com
   gen anno=2003 
save "$migranti\migb_2003.dta" , replace

use "$migranti\migb_2002.dta" , clear
   rename pop_stran_m m_popfin
   rename pop_stran_f f_popfin
   rename tot_pop_stran m_f_popfin
   rename minor_m m_minsti
   rename minor_f f_minsti
   rename minor m_f_minsti
save "$migranti\migb_2002.dta" , replace

forvalues t=1998/2000{
use "$migranti\migb_`t'.dta" , clear 
   rename popolaz__mas__straniera_resident m_popfin
   rename popolaz__fem__straniera_resident f_popfin
   rename popolaz__tot__straniera_resident m_f_popfin
   rename pop__mas__minorenne_stran__resid m_minsti
   rename pop__fem__minorenne_stran__resid f_minsti
   rename pop__tot__minorenne_stran__resid m_f_minsti
   rename denominazione_comune com
   gen anno=`t'
save "$migranti\migb_`t'.dta" , replace
}

forvalues t=1995/2000{
 use "$migranti\migb_`t'.dta" , clear
 rename regione region
 rename anno year
 rename prov cod_prov 
 rename com cod_mun 
 rename m_popfin  mm
 lab var mm "male migrant population"
 rename f_popfin  fm 
 lab var fm "female migrant population"
 rename m_f_popfin tm
 lab var tm "total migrant population"
 rename m_minsti mm_less18
 lab var mm_less18 "male minor migrants"
 rename f_minsti fm_less18
 lab var fm_less18 "female minor migrants"
 rename m_f_minsti tm_less18
 lab var tm_less18 "total minor migrants"
 rename comune COMUNE
 collapse (sum) mm fm tm , by( region cod_prov year)
 keep region year cod_prov mm fm tm 
 save "$data\MIGB_`t'.dta" , replace
 }
 
 forvalues t=2002/2012{
 use "$migranti\migb_`t'.dta" , clear
 rename regione region
 rename anno year
 rename prov cod_prov 
 rename comune cod_mun 
 rename m_popfin  mm
 lab var mm "male migrant population"
 rename f_popfin  fm 
 lab var fm "female migrant population"
 rename m_f_popfin tm
 lab var tm "total migrant population"
 rename m_minsti mm_less18
 lab var mm_less18 "male minor migrants"
 rename f_minsti fm_less18
 lab var fm_less18 "female minor migrants"
 rename m_f_minsti tm_less18
 lab var tm_less18 "total minor migrants"
 collapse (sum) mm fm tm , by( region cod_prov year)
 keep region year cod_prov mm fm tm 
 save "$data\MIGB_`t'.dta" , replace
 }
 
// Generate 2001 as mean 2000-2002

forvalues t=2000(2)2002{
use "$data\MIGB_`t'.dta" , clear
foreach var in mm fm tm year {
rename `var' `var'`t' 
destring `var'`t' , replace
}
* check duplicates on merge variables
duplicates report
duplicates tag region cod_prov   , gen(rep)
duplicates drop region cod_prov   , force
save "$data\MIGB_merge`t'.dta" , replace
}
use "$data\MIGB_merge2000.dta" , clear
merge 1:1 region cod_prov using "$data\MIGB_merge2002.dta"
sort _merge
gen year=2001 
foreach var in mm fm tm  {
egen `var'= rowmean(`var'2000 `var'2002)
egen `var'_med= rowmedian(`var'2000 `var'2002)
replace `var'=round(`var')
replace `var'_med=round(`var'_med)
}
keep region cod_prov year mm fm tm mm_med fm_med tm_med 
save "$data\MIGB_2001.dta" , replace


* Aggregate dataset

use "$data\MIGB_1995.dta" , clear
forvalues t=1996/2012{
append using "$data\MIGB_`t'.dta"
}
label define codici_province 001 "Torino" 002 "Vercelli" 003 "Novara" 004 "Cuneo" 005 "Asti" 006 "Alessandria" 096 "Biella" 103 "Verbano-Cusio-Ossola" ///
007 "Valle D'Aosta/Valle d'Aoste" 012 "Varese" 013 "Como" 014 "Sondrio" 015 "Milano" 016 "Bergamo" 017 "Brescia" 018 "Pavia" 019 "Cremona" 020 "Mantova" 097 "Lecco" 098 "Lodi" 108 ///
"Monza e della Brianza" 021 "Bolzano/Bozen" 022 "Trento" 023 "Verona" 024 "Vicenza" 025 "Belluno" 026 "Treviso" 027 "Venezia" 028 "Padova" 029 "Rovigo" 030 "Udine" 031 "Gorizia" ///
032 "Trieste" 093 "Pordenone" 008 "Imperia" 009 "Savona" 010 "Genova" 011 "La Spezia" 033 "Piacenza" 034 "Parma" 035 "Reggio nell'Emilia" 036 "Modena" 037 "Bologna" 038 "Ferrara" ///
039 "Ravenna" 040 "Forli-Cesena" 099 "Rimini" 045 "Massa-Carrara" 046 "Lucca" 047 "Pistoia" 048 "Firenze" 049 "Livorno" 050 "Pisa" 051 "Arezzo" 052 "Siena" 053 "Grosseto" 100 "Prato" ///
054	"Perugia" 055 "Terni" 041 "Pesaro e Urbino" 042	"Ancona" 043	"Macerata" 044	"Ascoli Piceno" 109	"Fermo" 056	"Viterbo" 057	"Rieti" 058	"Roma" 059	"Latina" 060 "Frosinone" ///
066	"L'Aquila" 067	"Teramo" 068	"Pescara" 069	"Chieti" 070	"Campobasso" 094	"Isernia" 061	"Caserta" 062	"Benevento" 063	"Napoli" 064	"Avellino" 065	"Salerno" ///
071 "Foggia" 072	"Bari" 073	"Taranto" 074	"Brindisi" 075	"Lecce" 110	"Barletta-Andria-Trani" 076	"Potenza" 077	"Matera" 078	"Cosenza" 079	"Catanzaro" ///
080	"Reggio di Calabria" 101	"Crotone" 102	"Vibo Valentia" 081	"Trapani" 082	"Palermo" 083	"Messina" 084	"Agrigento" 085	"Caltanissetta" 086	"Enna" 087	"Catania" ///
088	"Ragusa" 089	"Siracusa" 090	"Sassari" 091	"Nuoro" 092	"Cagliari" 095	"Oristano" 104	"Olbia-Tempio" 105	"Ogliastra" 106	"Medio Campidano" 107	"Carbonia-Iglesias"
label define codici_regioni  1 "Piemonte" 2 "Valle D'Aosta" 3 "Lombardia" 4 "Trentino Alto-Adige" 5 "Veneto" 6 "Friuli Venezia Giulia" 7 "Liguria" 8 "Emilia Romagna" ///
9 "Toscana" 10 "Umbria" 11 "Marche" 12 "Lazio" 13 "Abruzzo" 14 "Molise" 15 "Campania" 16 "Puglia" 17 "Basilicata" 18 "Calabria" 19 "Sicilia" 20 "Sardegna" 
lab values cod_prov codici_province
lab values region codici_regioni
 
global varlist mm fm tm  mm_med fm_med tm_med 
order year region  cod_prov $varlist
duplicates report
duplicates tag region cod_prov  year , gen(rep)
tab rep
duplicates drop region cod_prov  year , force 
drop rep
keep  year region cod_prov mm fm tm mm_med fm_med tm_med
save "$data\MIGB_dataset.dta" , replace
 

 
 
* Generate population vector by origin, mun (prov), year
* 1. Merge population data (aggregate data) with migrant bilancio data (aggregate)

use "$data\MIGB_dataset.dta" , clear
preserve
keep year cod_prov tm
save "$data\migrate_bynationality.dta" , replace
restore
merge 1:1 region cod_prov year using "$data\P_datasetprov.dta"
tab _merge
drop _merge
gen ti= tp-tm
lab var ti "total italian population"
gen mi= mp-mm
lab var mi "male italian population"
gen fi= fp-fm
lab var fi "female italian population"
order year region cod_prov tp mp fp mi fi ti 
keep year region cod_prov tp mp fp mi fi ti 
rename mi mmn
rename fi fmn
rename ti tmn
gen nationality=998 // Italian 
save "$data\PI_dataset.dta" , replace

 
* 2. Append PI_data with migration (sex and citizenship) data
* Complete vector of pop my nationality by mun, year (including italian)

use "$data\MSC_dataset.dta" , clear
append using "$data\PI_dataset.dta" 
order year region cod_prov  nationality
* Change provinces modified over time
recode cod_prov ( 108 = 15) // change monza-brianza in MILANO
recode cod_prov ( 110 = 72) // change barletta-andria-trani e foggia in BARI 
recode cod_prov  ( 71 = 72)  
recode cod_prov ( 91 = 92 ) // change oristano, nuoro, olbia-tempio, ogliastra, medio-campidano, carbonia-iglesias in CAGLIARI
recode cod_prov ( 95 = 92 )
recode cod_prov ( 104 = 92 )
recode cod_prov ( 105 = 92 )
recode cod_prov ( 106 = 92 )
recode cod_prov ( 107 = 92 )
recode cod_prov ( 99 = 41) // change rimini in pesaro
recode cod_prov ( 109 = 44) // change fermo in ascoli-piceno
collapse (sum) tmn mmn fmn tp mp fp, by(cod_prov year nationality)

bys year cod_prov  : egen TP= max(tp)
bys year cod_prov  : egen MP= max(mp)
bys year cod_prov  : egen FP= max(fp)
gen qt_ny=  (tmn/TP)*100
lab var qt_ny "perc. tot migrant by nationality-prov-year"
gen qm_ny=  (mmn/MP)*100
lab var qm_ny "perc. male migrant by nationality-prov-year"
gen qf_ny=  (fmn/FP)*100
lab var qf_ny "perc. female migrant by nationality-prov-year"


* Generate vector Q by j-ethnic groups, by mun and year
* Include different ethnic group classifications

cap drop farea7 farea
drop if nationality==999
rename nationality nat
merge m:1 nat using "$data\CodiciStatiEsteri.dta" , keep(master match) keepusing(nat farea EUII)
tab _merge
drop _merge
replace EUII=4 if nat==224 
replace EUII=4 if nat==258  
replace EUII=4 if nat==259  
replace EUII=4 if nat==260  
replace EUII=4 if nat==261  
replace EUII=6 if nat==317  
tab nat if EUII==.
save "$data\Q_data_country.dta", replace


use "$data\Q_data_country.dta" , clear
recode nationality (998=0) 

preserve
collapse (mean) TP , by(cod_prov)
rename TP population 
rename cod_prov provres
save "$data\population_byprovince" , replace
restore 

preserve
keep year region cod_prov nationality qm_ny qt_ny
rename cod_prov provres
rename nationality hbc
rename qm_ny qmny_h 
rename qt_ny qtny_h
save "$data\Q_merge_male.dta", replace
restore

keep year region cod_prov nationality qt_ny qf_ny
rename cod_prov provres
rename nationality wbc
rename qf_ny qfny_w
rename qt_ny qtny_w
save "$data\Q_merge_female.dta", replace







